## This script creates an IGV file of coordinated read counts
## Argument 1: TA site file
## Argument 2: Staph genes file
## Argument 3: Counts file 
## >> python igv_staph.py Staph_TA saouhsc.txt hopcount.tabular > output.igv

import sys

## Gather all TA sites into dictionary

TAsites = {}

for line in open(sys.argv[1]):
    split = line.split()
    if split[0] == 'TA':
        TAsites[int(split[4])] = [0]
#print TAsites
## Add gene name as second string in dictionary value



for line in open(sys.argv[2]):
    split = line.split('\t')
    name = split[0]; start = int(split[1]); end = int(split[2])+1
    for i in range(start, end):
        if i in TAsites: TAsites[i].append(name)
#print TAsites
 
        

## Add read counts to first item in dictionary value list


whole_text=file(sys.argv[3]).read()
arrayed = whole_text.split('\n')
#lenarrayed = len(arrayed)
#print lenarrayed
#    print line
for line in arrayed:
    split = line.split("\t")
    #print split[1]
    #split = line.split('\t')
    pos = 0 ## set pos as int denoting position
    if len(split)>= 6 and split[1].isdigit() == True:
        if int(split[4]) != 0:
            pos = int(split[1]) ## 
            if pos+15 in TAsites:
                TAsites[pos+15][0] = TAsites[pos+15][0]+int(split[4])
            elif pos+14 in TAsites:
                TAsites[pos+14][0] = TAsites[pos+14][0]+int(split[4])
        if int(split[5]) != 0:
            pos = int(split[1])
            if pos-15 in TAsites: 
                TAsites[pos-15][0] = TAsites[pos-15][0]+int(split[5])
            elif pos-16 in TAsites:
                TAsites[pos-16][0] = TAsites[pos-16][0]+int(split[5])
            else:
                pass                
## Print

keys = TAsites.keys()
keys.sort()

for k in keys:
    print 'SAOHSC', '\t', int(k), '\t', int(k) + 2, '\t', TAsites[k][0],
    if len(TAsites[k]) > 1: print '\t', TAsites[k][1]
    else: print